*** PREPARE UK DATA FROM MEDIA CORPUS AND COMBINE W/PARLSPEECH DATA ***

*** 	This code aggregates from article level visibility..	***
*** 	...to visibility on the level of MP X month 			***

***		Plus, adds metadata and combines all into...			***
***		...monthly & quarterly datasets ready for analysis 		***




********************************************************************************
**************** Aggregate media data, creating newscount **********************

** Load article level dataset 
use "article_level_unitedkingdom.dta", clear

** Aggregate by MP and month, summing news presence
collapse ///
	(sum) newscount=actor_occ ///
	, by(actor_id yearmonth)

** Rename, reformat
rename actor_id actor
destring yearmonth, replace

** Save
save "mp_X_month uk.dta", replace
********************************************************************************




********************************************************************************
**************** Combine newscount with ParlSpeech data ************************

use "..\Prepare ParlSpeech data\parlspeech_mp_X_month.dta", clear

* Drop 6 duplicate observations (due to MPs with two party ids within same month)
duplicates drop yearmonth actor, force
duplicates report yearmonth actor

** merge
merge 1:1 yearmonth actor using "mp_X_month uk.dta"

** Drop observations with no match (mainly due to media data from outside our time period)  
drop if _merge==2

** Replace missing newscount with meaningful 0s to account for no media presence
replace newscount=0 if _merge==1
drop _merge

** Save
save "mp_X_month uk.dta", replace
********************************************************************************




********************************************************************************
**************** Add age, years of experience and gender ************************

** Load article level dataset again 
use "article_level_unitedkingdom.dta", clear

** Aggregate by MP and month, extracting gender
collapse ///
	(first) fem = gender year_of_birth = year_of_birth ///
	, by(actor_id)

** Labelling gender variable
label define fem 0 "Men" 1 "Women"
label values fem fem

** Add MP experience
merge 1:1 actor_id using "mps_first_elected_uk.dta"
keep if _merge==3
drop _merge

** Rename
rename actor_id actor

** Save
save "gender_and_experience.dta", replace

** merge
use "mp_X_month uk.dta", clear
merge m:1 actor using "gender_and_experience.dta"
keep if _merge==3
drop _merge

** Create age variable
gen age = year - year_of_birth
drop year_of_birth

** Create experience variable
gen mp_exp = year - first_year
drop first_year

** Save
save "mp_X_month uk.dta", replace
********************************************************************************




********************************************************************************
**************** Add meta data on parties, cabinets...  ************************

** Load meta data, rename, create new variables, drop some variables
use "cabinet party meta data uk.dta", clear
rename party_seat_share seat_share
rename cabinet_seat_share share_of_parliament
rename election_month electionperiod
generate party_extremity = abs(party_left_right- 5)
recode cabinet_left_right_weighted (1/5=0)(5.1/10=1), gen(cabinet_right)
drop cabinet_id year cabinet_seats parliament_seats_total cabinet_left_right_weighted ///
	cabinet_left_right_unweighted election_id year_election yearmonth_election party_left_right

** Merge
merge 1:m yearmonth party_id using "mp_X_month uk.dta"
drop if _merge<3
drop _merge

** Save
save "mp_X_month uk.dta", replace
********************************************************************************




********************************************************************************
**************** Add electoral safety, turnout, electorate, distance ***********

** merge with meta data
use "mp_X_month uk.dta", clear
merge m:1 yearmonth actor using "constituency election meta uk.dta"
drop if _merge<3
drop _merge

** Save
save "mp_X_month uk.dta", replace
********************************************************************************




********************************************************************************
**************** Add months from election tally  *******************************

** Load 
use "tally_months_uk.dta", clear

** Merge
merge 1:m yearmonth using "mp_X_month uk.dta"
drop if _merge<3
drop _merge

** Save
save "mp_X_month uk.dta", replace
********************************************************************************




********************************************************************************
***************** Monthly file ready for analysis, save ************************

use "mp_X_month uk.dta", clear

** A recode to ease comparability of output from NO and UK analysis
rename majority_votes electoral_safety

** encode actor variable
encode actor, gen(actor_enc)
drop actor
rename actor_enc actor

** Monthly file ready for analysis, save
save "..\Files for analysis\monthly data uk", replace
********************************************************************************




********************************************************************************
**************** Create quarterly dataset  *************************************

** Load monthly dataset
use "..\Files for analysis\monthly data uk", replace

** Create quarterly time variable
destring month, replace
gen qdate = qofd(mdy(month, 1, year))
format qdate %tq

** Aggregate by actor and quarter
collapse ///
	(sum) newscount legspeech ///
	(first) party_id year electoral_safety ///
	distance turnout_perc electorate fem age mp_exp months_from_election ///
	seat_share share_of_parliament cabinet_right ///
	cabinet_party party_extremity  ///
	(max) electionperiod, ///
	by(actor qdate)

** Labelling gender variable
label values fem fem

* gen lagged newscount variable
tsset actor qdate
gen l_newscount = l.newscount

** Quarterly file ready for analysis, save
save "..\Files for analysis\quarterly data uk", replace
********************************************************************************


